# -*- coding: utf-8 -*-
# @Time: 2024/11/2 14:11
# @Author: foxhuty
# @File: np_notes.py
import pandas as pd
import numpy as np


def get_mixed_data(file):
    excel_file = pd.ExcelFile(file)
    sheet_names = [sheet_name for sheet_name in excel_file.sheet_names if
                   sheet_name in ['物理类', '历史类', '总表']]
    df_list = [pd.read_excel(file, sheet_name=sheet_name, dtype={'考号': str, '考生号': str}) for
               sheet_name in
               sheet_names]
    data_physics = [data for data in df_list if '物理' in data.columns][0]
    data_history = [data for data in df_list if '历史' in data.columns][0]
    df_mixed = pd.concat([data_physics, data_history])
    # print(df_mixed.head())
    df_mixed.sort_values(by='总分', ascending=False, inplace=True)
    df_mixed['序号'] = [i + 1 for i in range(len(df_mixed))]
    # df_mixed.to_excel(r'D:\data_test\mixed_df.xlsx', index=False)
    return df_mixed


def separate_data(file):
    data_mixed = get_mixed_data(file)
    history_min = data_mixed['历史'].min(skipna=True)
    physics_min = data_mixed['物理'].min(skipna=True)
    data_history = data_mixed[data_mixed['历史'] >= history_min]
    # 删除多列
    data_history.drop(['物理', '化学'], axis=1, inplace=True)

    data_physics = data_mixed[data_mixed['物理'] >= physics_min]
    # 删除单列
    del data_physics['历史']

    return data_history, data_physics


def get_subject_good_score(data, subject, total):
    total_num = data.shape[0]
    good_total_num = data.loc[data['总分'] >= total].shape[0]
    good_percent_ratio = good_total_num / total_num
    good_subject_ratio = int(data[subject].count() * good_percent_ratio)
    data.sort_values(by=subject, ascending=False, inplace=True, ignore_index=True)
    good_subject_score = data.loc[:good_subject_ratio - 1, subject].min()
    print(good_subject_ratio)
    print(good_subject_score)

    return good_subject_score


def dict_df():
    name = ['物理类', '历史类', '总表']
    score = [88, 99, 300]
    name_dict=dict(zip(name, score))
    print(name_dict)
    name_df=pd.DataFrame(name_dict)
    print(name_df)


if __name__ == '__main__':
    file_path = r'D:\data_test\高2022级零诊成绩测试数据.xlsx'
    df = pd.read_excel(file_path, sheet_name='物理类')
    # get_subject_good_score(df, '化学', 370)
    dict_df()
